Skip to content

Conversation

HolyMolyCowMan
Copy link
Contributor

This change adds a new folding pattern, folding a G_FPEXT(G_FCONSTANT) to a G_FCONSTANT.

To make this work on AArch64, the G_FCONSTANT should not be widened due to the G_FCONSTANT being converted to a G_CONSTANT. This should fix some other floating point combines when the G_FCONSTANT is widened due to being an fp16.

@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2025

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-amdgpu

Author: Ryan Cowan (HolyMolyCowMan)

Changes

This change adds a new folding pattern, folding a G_FPEXT(G_FCONSTANT) to a G_FCONSTANT.

To make this work on AArch64, the G_FCONSTANT should not be widened due to the G_FCONSTANT being converted to a G_CONSTANT. This should fix some other floating point combines when the G_FCONSTANT is widened due to being an fp16.


Patch is 302.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160902.diff

23 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+2)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1)
  • (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+1-1)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir (+3-2)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir (+3-3)
  • (modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+2-5)
  • (modified) llvm/test/CodeGen/AArch64/dup.ll (+20-10)
  • (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+10-11)
  • (modified) llvm/test/CodeGen/AArch64/fcvt-fixed.ll (+176-385)
  • (modified) llvm/test/CodeGen/AArch64/frem-power2.ll (+1-2)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll (+11-41)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd.ll (+18-24)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll (+6-24)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll (+1501-1716)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+7-10)
  • (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+18-28)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+5-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+32-44)
  • (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1-2)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 204e1f6887fa2..57828a270ec00 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -694,6 +694,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
 def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
 def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
 def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
 
 // Fold constant zero int to fp conversions.
 class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -712,6 +713,7 @@ def constant_fold_fp_ops : GICombineGroup<[
   constant_fold_fsqrt,
   constant_fold_flog2,
   constant_fold_fptrunc,
+  constant_fold_fpext,
   itof_const_zero_fold_si,
   itof_const_zero_fold_ui
 ]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0ebee2cfd8688..2206a558f9f4c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
     Result.clearSign();
     return Result;
   }
+  case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC: {
     bool Unused;
     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 076a6235eef0a..121ed198a5958 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -351,7 +351,7 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, cast_of_cast_combines, 
+                       [copy_prop, cast_of_cast_combines, constant_fold_fp_ops, 
                         buildvector_of_truncate, integer_of_truncate,
                         mutate_anyext_to_zext, combines_for_extload, 
                         combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ea2196a584127..5613364626692 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0)
       .clampScalar(0, s8, s64);
   getActionDefinitionsBuilder(G_FCONSTANT)
-      .legalFor({s32, s64, s128})
-      .legalFor(HasFP16, {s16})
+      // Always legalize S16 to prevent G_FCONSTANT being widened to G_CONSTANT
+      .legalFor({s16, s32, s64, s128})
       .clampScalar(0, MinFPScalar, s128);
 
   // FIXME: fix moreElementsToNextPow2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76852b54..c00ce2242a888 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
     ; CHECK-NEXT: $w0 = COPY [[C]](s32)
     ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
     ; CHECK-NEXT: $x0 = COPY [[C1]](s64)
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = G_FCONSTANT float 1.0
     $w0 = COPY %0
     %1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219dc4927e..c6df3456a8445 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     ; NO-FP16-LABEL: name: fp16
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
     ; NO-FP16-NEXT: $h0 = COPY %cst(s16)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $h0
     ;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     ; NO-FP16-LABEL: name: fp16_non_zero
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
     ; NO-FP16-NEXT: $h0 = COPY %cst(s16)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $h0
     ;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
 body:             |
   bb.1.entry:
     ; NO-FP16-LABEL: name: nan
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
     ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
     ; NO-FP16-NEXT: $w0 = COPY %ext(s32)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07c7ede4..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
 ;
 ; GISEL-LABEL: postidx32_shalf:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #0 ; =0x0
 ; GISEL-NEXT:    ldr h1, [x0], #4
-; GISEL-NEXT:    fmov s2, w8
 ; GISEL-NEXT:    ; kill: def $h0 killed $h0 def $s0
 ; GISEL-NEXT:    fmov w9, s0
-; GISEL-NEXT:    fcvt s3, h1
+; GISEL-NEXT:    fcvt s2, h1
 ; GISEL-NEXT:    fmov w8, s1
-; GISEL-NEXT:    fcvt s2, h2
-; GISEL-NEXT:    fcmp s3, s2
+; GISEL-NEXT:    fcmp s2, #0.0
 ; GISEL-NEXT:    csel w8, w8, w9, mi
 ; GISEL-NEXT:    strh w8, [x1]
 ; GISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 079ff1076b110..1c4a6ab2217b0 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1469,8 +1469,9 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v2half:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load half, ptr %p
@@ -1526,8 +1527,9 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v3half:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load half, ptr %p
@@ -1583,8 +1585,9 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v4half:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load half, ptr %p
@@ -1639,8 +1642,9 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v8half:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load half, ptr %p
@@ -1713,9 +1717,10 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v16half:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d2, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.8h, v1.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT:    str h2, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load half, ptr %p
@@ -1771,8 +1776,9 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v2bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load bfloat, ptr %p
@@ -1828,8 +1834,9 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v3bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load bfloat, ptr %p
@@ -1885,8 +1892,9 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v4bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load bfloat, ptr %p
@@ -1941,8 +1949,9 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v8bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d1, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT:    str h1, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load bfloat, ptr %p
@@ -2015,9 +2024,10 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
 ; CHECK-GI-LABEL: loaddup_str_v16bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    movi d2, #0000000000000000
 ; CHECK-GI-NEXT:    dup v0.8h, v1.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT:    str h2, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load bfloat, ptr %p
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536da26f26..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,17 @@ define void @test_fccmp(half %in, ptr %out) {
 ;
 ; CHECK-CVT-GI-LABEL: test_fccmp:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT:    mov w9, #18432 // =0x4800
 ; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    fcvt s2, h0
-; CHECK-CVT-GI-NEXT:    fmov s1, w8
-; CHECK-CVT-GI-NEXT:    fmov s3, w9
-; CHECK-CVT-GI-NEXT:    fmov w9, s0
-; CHECK-CVT-GI-NEXT:    fcvt s1, h1
-; CHECK-CVT-GI-NEXT:    fcvt s3, h3
-; CHECK-CVT-GI-NEXT:    fcmp s2, s1
-; CHECK-CVT-GI-NEXT:    fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT:    csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT:    fcvt s1, h0
+; CHECK-CVT-GI-NEXT:    fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT:    fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT:    fcmp s1, s2
+; CHECK-CVT-GI-NEXT:    ldr h2, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT:    fmov w8, s0
+; CHECK-CVT-GI-NEXT:    fmov w9, s2
+; CHECK-CVT-GI-NEXT:    fccmp s1, s3, #4, mi
+; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, gt
 ; CHECK-CVT-GI-NEXT:    strh w8, [x0]
 ; CHECK-CVT-GI-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe25d3b..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #15
 ; CHECK-...
[truncated]

Copy link
Contributor

@aemerson aemerson left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@HolyMolyCowMan HolyMolyCowMan force-pushed the constant-fold-fpext-fptrunc branch from f14329f to af4a9c6 Compare October 8, 2025 15:33
@HolyMolyCowMan
Copy link
Contributor Author

Rebasing and updating tests now that #161205 is merged.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants